import enchant
import re

def filter_str(desstr, restr=''):
    # 过滤除中英文及数字以外的其他字符
    # res = re.compile("[^\\u4e00-\\u9fa5^a-z^A-Z^0-9]")
    res = re.compile("[^\\u4e00-\\u9fa5^a-z^A-Z]") # 只保留字母
    return res.sub(restr, desstr)
# 测试
# 从网络上拷贝的一段英文，实现分隔。当不是分隔符时，认为是一个单词。
# 需要定义一个变量来记录单词的开始
p = "Nowadays, Chinese, our mother tongue, #define #include< = ( =" \
    "issss enjoying an unprecedented popularity in the world." \
    " Over two hundred Confucius Institutes have been set up in sixty-nine countries," \
    "and many foreigners are attracted by the old, 1234" \
    " mysterious and charmin... "
words = []  # 建立一个空列表
index = 0   # 遍历所有的字符
start = 0   # 记录每个单词的开始位置
dict = enchant.Dict("en_US")
while index < len(p):   # 当index小于p的长度
    start = index       # start来记录位置
    while p[index] != " " and p[index] not in[".", ","]:   # 若不是空格，点号，逗号
        index += 1   # index加一
        if index == len(p):  # 若遍历完成
            break   # 结束
    w = filter_str(p[start:index])
    if w != "":
        if dict.check(w):
            words.append(w)
    # words.append(p[start:index])
    if index == len(p):
        break
    while p[index] == " " or p[index] in [".", ",", "#"]:
        index += 1
        if index ==len(p):
            break
 
print(words)
 